import pickle
import os
import re


poetry_folder = "poems-data"
file_type = ".txt"
n_lines = 9

poems = []

file_list = os.listdir(poetry_folder)


def process_line(line):
    line = line.decode('utf-8')
    line = line.strip()
    line = re.sub(' +', ' ', line) # Removes double spaces
    return line


for filename in os.listdir(poetry_folder):
    if filename.endswith(file_type):
        with open(os.path.join(poetry_folder, filename), 'rb') as file:
            print(filename)
            poem = []
            for line in file:
                line = process_line(line)
                poem.append(line)
            if len(poem) != n_lines:
                print("wrong number of lines: " + str(len(poem)))
            else:
                poems.append(poem)

print("found " + str(len(poems)) + " poems")

pickle.dump(poems, open("pickles/poems.pcl", "wb"))
